import pandas as pd
import numpy as np

pd.set_option('display.max_rows', None, 'display.max_columns', None, 'display.max_colwidth', 1000, 'display.expand_frame_repr', False)

df = pd.read_csv(r'../../../../large_data/共享单车/train.csv')
print(df[:5])
# datetime  season  holiday  workingday  weather  temp   atemp  humidity  windspeed  casual  registered  count
# atemp 体感温度？
# casual 未注册用户租赁数
# registered 注册用户租赁数
# count casual+registered
print(df['season'].value_counts())
print(df['holiday'].value_counts())
print(df['workingday'].value_counts())
print(df['weather'].value_counts())
print(np.unique(df['count'] == df['casual'] + df['registered']))

from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df, train_size=0.75,
                                     random_state=666)
